import requests
from lxml import etree
import  time
def single():
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0'
    }
    url = 'https://yz.chsi.com.cn/sch/search.do?'
    start = '0'
    with open('../venv/shuju.txt', 'w', encoding='utf-8') as file:
        for a in range(40):
            param = {
                'start': start
            }
            r = requests.get(url=url, params=param, headers=headers)
            data_text = r.text
            start = str(int(start) + 20)
            tree = etree.HTML(data_text)
            div_list = tree.xpath('//div[@class="sch-list-container"]/div')
            for div in div_list:
                name_data = div.xpath('.//a[@class="name js-yxk-yxmc text-decoration-none"]/text()')[0]
                name_data = name_data.strip()
                href_data = 'https://yz.chsi.com.cn' + \
                            div.xpath('.//a[@class="name js-yxk-yxmc text-decoration-none"]/@href')[0]
                print(name_data, href_data)
                file.write(name_data+':'+ href_data+'\n')
            print(f'------{a + 1}------')
        # cleaned_div_list = [text.replace('\n', '').replace('\r', '').replace(' ', '') for text in div_list]

if __name__ == '__main__':
    stat = time.time()
    single()
    end = time.time()
    print("single_cost", end - stat)


